home *** CD-ROM | disk | FTP | other *** search
- /******************************************************************************/
- #include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
- /******************************************************************************/
- #define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
- #define NEWGI (newetd->etdgi+1) /* GI of new tag. */
- /******************************************************************************/
- #define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
- #define RCEND 1 /* No more tokens: end element and retry GI. */
- #define RCREQ 2 /* Required GI must precede proposed GI. */
- #define RCMISS 3 /* GI invalid: not element end; no required GI. */
- #define RCHIT 4 /* GI is the one expected next. */
- #define RCMEX 5 /* GI invalid: minus exception. */
- #define RCHITMEX 6 /* RCMEX with invalid attempted minus exclusion.*/
- #define RCPEX 7 /* GI is valid solely because of plus exclusion.*/
- #define RCNREQ 8 /* Token is not required; can retry invalid GI. */
- #define PEX -1 /* GI is a plus exception and not a minus. */
- #define M pos[0].g /* Index of current token in model. */
- #define P pos[0].t /* Index of current group in pos. */
- #define G pos[P].g /* Index of current group in model. */
- #define T pos[P].t /* Index of current token in its group. */
- #define H pos[P].h /* Hit bits for current group's tokens (1=hit). */
- #define GHDR mod[G] /* Current group header. */
- #define TOKEN mod[M] /* Current token. */
- #define TTYPE (GET(TOKEN.ttype, TTMASK)) /* Token type of current token. */
- #define TOCC (GET(TOKEN.ttype, TOREP)) /* Occurrence for current token. */
- #define GTYPE (GET(GHDR.ttype, TTMASK)) /* Token type of current group. */
- #define GOCC (GET(GHDR.ttype, TOREP)) /* Occurrence for current group. */
- #define GNUM GHDR.tu.tnum /* Number of tokens in current grp. */
- /******************************************************************************/
- static long l1; /* Intermediate variable for hit selector. */
- #define TOKENHIT (l1 = 1L<<(T-1), H&l1) /* 1=current token was hit; 0=not*/
- /******************************************************************************/
- /* CONTEXT: Determine whether a GI is valid in the present structural context.
- Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
- different element is required, and RCMISS if it is totally invalid.
- On entry, pos points to the model token to be tested against the GI.
- TO DO: Save allowed GIs for an error message on an RCMISS.
- Support a "query" mode (what is allowed now?) by working
- with a copy of pos.
- */
- int context(gi, mod, pos, statuspt, mexts)
- struct etd *gi; /* ETD of new GI. */
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
- int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
- {
- UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
-
- Tstart = T; /* Save starting token for AND group testing. */
- while (STATUS!=RCMISS && STATUS!=RCEND) {
- #ifndef FINAL
- if (ctrace) tracegi("CONTEXT", gi, mod, pos, (int)Tstart);
- #endif
- while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
- pos[P+1].g = M++; pos[++P].t = 1; H = 0;
- Tstart = T; /* Save starting token for AND group testing. */
- #ifndef FINAL
- if (ctrace) tracegi("OPENGRP", gi, mod, pos, (int)Tstart);
- #endif
- }
- STATUS = (char)tokenreq(gi, mod, pos);
- #ifndef FINAL
- if (ctrace) tracegi("STATUS", gi, mod, pos, (int)Tstart);
- #endif
- if (gi==TOKEN.tu.thetd) { /* Hit in model. */
- STATUS = (char)RCHIT;
- gtypesv = GTYPE; toccsv = TOCC;
- newtoken(mod, pos, statuspt);
- return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT))
- ? RCMEX : RCHITMEX);
- }
- if (mexts==-1) return((int)(STATUS = RCPEX)); /* Hit in plus grp. */
- if (STATUS==RCREQ) {
- STATUS = RCHIT;
- nextetd = TOKEN.tu.thetd;
- newtoken(mod, pos, statuspt);
- return(RCREQ);
- }
- /* else if (STATUS==RCNREQ) */
- if (mexts>0) return(RCMEX);
- newtoken(mod, pos, statuspt);
- }
- return((int)STATUS);
- }
- /******************************************************************************/
- /* ECONTEXT: Determine whether the current element can be ended, or whether
- non-optional tokens remain at the current level or higher.
- Returns 1 if element can be ended, or 0 if tokens remain.
- On entry, STATUS==RCEND if there are no tokens left; if not,
- pos points to the next model token to be tested.
- TO DO: Support a "query" mode (what is required now?) by working
- with a copy of pos.
- */
- int econtext(mod, pos, statuspt)
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
- {
- unsigned next; /* Position in AND group of next testable token.*/
-
- Tstart = T;
- #ifndef FINAL
- if (ctrace) traceend("ECONT", mod, pos, 0, 0, (int)Tstart);
- #endif
- if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
- else nextetd = TOKEN.tu.thetd;
- while (STATUS!=RCMISS && STATUS!=RCEND) {
- STATUS = (char)testend(mod, pos, 0, 0);
- #ifndef FINAL
- if (ctrace) traceend("ECONTEND", mod, pos, 0, 0, (int)Tstart);
- #endif
- nextetd = P<=1 ? 0 : TOKEN.tu.thetd;
- if (STATUS==RCEND) return(1);
- if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
- if (STATUS==RCMISS) {
- if (BITON(TOCC, TOPT)) nextetd = 0;
- return(0);
- }
- if (!tokenopt(mod, pos)) return(0);
-
- STATUS = RCNREQ;
- if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
- else T = (char)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
- next : offbit(H, 0, GNUM));
-
- M = G + grpsz(&GHDR, (int)T-1) + 1;
- #ifndef FINAL
- if (ctrace) traceend("ECONTNEW", mod, pos, 0, 0, (int)Tstart);
- #endif
- }
- if (STATUS==RCMISS) {
- if (BITON(TOCC, TOPT)) nextetd = 0;
- return(0);
- }
- return(1); /* STATUS==RCEND */
- }
- /******************************************************************************/
- /* NEWTOKEN: Find the next token to test. Set STATUS to indicate results:
- RCEND if element has ended (no more tokens to test);
- RCREQ if required new token was found;
- RCNREQ if non-required new token was found;
- RCHIT if a hit token was repeated (now non-required);
- and RCMISS if a new token can't be found because current token
- (which was not hit) was neither unconditionally required nor
- optional.
- */
- VOID newtoken(mod, pos, statuspt)
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
- {
- unsigned nextand = 0; /* Position in AND group of next testable token.*/
- UNCH Psave = 0; /* For testing whether group ended. */
- int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
- int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
-
- /* If the GI was a hit, turn on the hit bit and set the status to
- assume that the token to be tested against the next GI will
- be non-required. If the current token is repeatable, exit so
- it will stand as the next token to test.
- */
- if (STATUS==RCHIT) {
- BITNON(H, T); STATUS = RCNREQ;
- if (BITON(TOCC, TREP)) return;
- }
- /* At this point, we must determine the next token to test:
- either against the next GI, if this one was a hit, or
- against the same GI if conditions permit a retry.
- To find the next token, we must first end the current group,
- if possible, and any we can that contain it.
- If the outermost group was a hit and is repeatable, or
- if the element has ended, we exit now.
- If it hasn't ended, or was optional and ended with a miss,
- we can retry the GI against the next token.
- */
- if ((STATUS = (char)testend(mod, pos, 1, 1))!=RCNREQ) return;
-
- /* At this point, the "current token" is either the original one,
- or the token for the highest level unhit group that it ended.
- We will retry a missed GI, by testing it against the next
- token, if the current token:
- 1. Is optional;
- 2. Was hit (i.e., because it is repeatable and was hit by a
- previous GI or because it is a hit group that just ended);
- 3. Is in an AND or OR group and is not the last testable token.
-
- It will be the next sequential one (unhit one, in an AND group);
- if there are none left, use the first unhit token in the group.
- In either case, set M to correspond to the new T.
- */
- retest:
- #ifndef FINAL
- if (ctrace) traceend("RETEST", mod, pos, (int)nextand, 1, (int)Tstart);
- #endif
- if (GTYPE==TTAND) nextand = offbit(H, (int)T, GNUM);
- if ( BITON(TOCC, TOPT)
- || TOKENHIT
- || GTYPE==TTOR /* T!=GNUM or group would have ended. */
- || nextand ) {
- if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
- else T = (char)(nextand ? nextand : offbit(H, 0, GNUM));
- M = G + grpsz(&GHDR, (int)T-1) + 1;
- /* If AND group wrapped, it can end if all non-optionals were hit. */
- if (GTYPE==TTAND && T==Tstart && !currhit) {
- Psave = P;
- rc = testend(mod, pos, 0, 1);
- if (Psave!=P) {if ((STATUS = (char)rc)==RCNREQ) goto retest;}
- else STATUS = RCMISS;
- }
- }
- else STATUS = RCMISS;
- #ifndef FINAL
- if (ctrace) traceend("NEWTOKEN", mod, pos, (int)nextand, 1, (int)Tstart);
- #endif
- return;
- }
- /******************************************************************************/
- /* TESTEND: End the current group, if possible, and any that it is nested in.
- The current token will either be a group header, or some token
- that could not end its group. Return 1 if the (possibly new)
- current token is repeatable; 0 if it is not.
- */
- int testend(mod, pos, andoptsw, newtknsw)
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- int andoptsw; /* 1=test optional AND members; 0=ignore. */
- int newtknsw; /* 1=new token test; 0=end element test. */
- {
- int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
-
- while (!rc) {
- #ifndef FINAL
- if (ctrace) traceend("TRACEEND", mod, pos, rc, andoptsw, (int)Tstart);
- #endif
- /* TESTMISS:
- If we've hit no tokens yet in the current group, and
- the current token is the last unhit one in the group we can test,
- we will end the group (it may never really have started!)
- because we might be able to try the token that follows it.
- In any group, a token is the last testable unhit token if it
- is the last sequential one, as the GI was already tested against
- the preceding unhit tokens. In addition,
- in a SEQ group, it is the last testable unhit token if it isn't
- optional, because we can't skip past it to the following ones.
- If we end the group, before popping the level, set M to G, as this
- level`s group header will be the next level's current token.
- */
- if (H==0 && (T==(char)GNUM || GTYPE==TTSEQ && BITOFF(TOCC, TOPT))) {
- M = G; --P; Tstart = T;
- if (P<=1) {
- if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
- else rc = RCMISS;
- }
- continue;
- }
- /* TESTHIT:
- See if we've hit all the non-optional tokens in the group.
- If so, pop to the previous level and set the group's hit bit.
- If we were called from NEWTOKEN we are trying to find the token
- to test against the next start-tag, so if the group is repeatable,
- process it again. (If not, we were called from ECONTEXT and
- are testing whether the element can be ended.)
- Otherwise, if we are at the first level, the element is over.
- */
- if ( GTYPE==TTOR && TOKENHIT
- || GTYPE==TTSEQ && T==(char)GNUM && (TOKENHIT || BITON(TOCC, TOPT))
- || GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw) ) {
- M = G; --P; BITNON(H, T); Tstart = T;
- if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
- else if (P<=1) rc = RCEND;
- /* Else loop to test new outer group. */
- }
- else rc = RCNREQ; /* No group ended this time, so return. */
- }
- #ifndef FINAL
- if (ctrace) traceend("ENDFOUND", mod, pos, rc, andoptsw, (int)Tstart);
- #endif
- return(rc);
- }
- /******************************************************************************/
- /* TOKENOPT: Return 1 if current token is contextually optional;
- otherwise, return 0.
- */
- int tokenopt(mod, pos)
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- {
- #ifndef FINAL
- if (ctrace) traceend("TOKENOPT", mod, pos, 0, 0, (int)Tstart);
- #endif
- return( BITON(TOCC, TOPT)/* Inherently optional. */
- || TOKENHIT /* Was hit (handles "plus" suffix case). */
- || H==0 && groupopt(mod, pos) );/* In optional group with no hits. */
- }
- /******************************************************************************/
- /* GROUPOPT: Temporarily makes the current group be the current token so that
- TOKENOPT() can be applied to it. Returns the value returned
- by TOKENOPT.
- */
- int groupopt(mod, pos)
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- {
- UNCH saveM; /* Save M when testing if group is not required.*/
- int rc; /* 1=contextually optional; 0=not. */
-
- if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
- saveM = M; M = G; --P;
- rc = tokenopt(mod, pos);
- ++P; G = M; M = saveM;
- return(rc);
- }
- /******************************************************************************/
- /* TOKENREQ: Returns RCREQ if the current token is "contextually required".
- That is, it is not contextually optional and
- 1) it is a member of a "seq" group that is either required
- or has at least 1 hit token.
- 2) it is a member of an "and" group in which all other
- tokens were hit.
- Optional tokens are not counted
- if GI is ETDCDATA, as we are looking for an
- omitted start-tag. Otherwise, they are counted,
- as the GI might match one of them.
- Returns RCNREQ if the current token is "not required".
- */
- int tokenreq(gi, mod, pos)
- struct etd *gi; /* ETD of new GI. */
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- {
- #ifndef FINAL
- if (ctrace) tracegi("TOKENREQ", gi, mod, pos, (int)Tstart);
- #endif
- return( tokenopt(mod, pos) ? RCNREQ
- : ( GTYPE==TTSEQ && (H!=0 || groupreq(gi, mod, pos)==RCREQ)
- /*|| GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1)*/ )
- ? RCREQ : RCNREQ );
- }
- /******************************************************************************/
- /* GROUPREQ: Temporarily makes the current group be the current token so that
- TOKENREQ() can be applied to it. Returns the value returned
- by TOKENREQ.
- */
- int groupreq(gi, mod, pos)
- struct etd *gi; /* ETD of new GI. */
- struct thdr mod[]; /* Model of current open element. */
- struct mpos pos[]; /* Position in open element's model. */
- {
- UNCH saveM; /* Save M when testing if group is not required.*/
- int rc; /* Return code: RCREQ RCNREQ */
-
- if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
- saveM = M; M = G; --P;
- rc = tokenreq(gi, mod, pos);
- ++P; G = M; M = saveM;
- return(rc);
- }
- /******************************************************************************/
- /* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
- from the group's start (G) to a specified index within the group (T).
- M = 0, plus 1 for each token in the group, plus the size of
- any subgroups (gotten by calling GRPSZ recursively). On entry,
- M must be equal to G at the current level.
- */
- int grpsz(g, t)
- struct thdr *g; /* mod[G]: Ptr to group in the model. */
- int t; /* T: Index of last token in the group. */
- {
- struct thdr *p = g; /* Ptr to current token in the model. */
- int m = 0; /* Size of group (including nested groups). */
- int i = 0; /* Number of group members (loop counter). */
- UNS type; /* Token type (without TOREP bits). */
-
- while (++i<=t) {
- ++p; ++m;
- type = GET(p->ttype, TTMASK);
- if (type==TTOR || type==TTSEQ || type==TTAND) {
- m += grpsz(p, p->tu.tnum);
- p = g+m;
- }
- }
- return(m);
- }
- /******************************************************************************/
- /* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
- (other than those that correspond to optional tokens if "opt" is
- 0) and the "but" bit (all bits if "but" bit is zero). Otherwise,
- returns 0. GRPSZ is used to skip past subgroup tokens.
- */
- int allhit(p, hits, but, opt)
- struct thdr *p; /* mod[G]: Ptr to group in the model. */
- long hits; /* H: Hit bits to be tested. */
- int but; /* Index of bit to ignore; 0=test all. */
- int opt; /* 1=optional tokens must be hit; 0=ignore. */
- {
- int b = 0; /* Index of bit being tested in hits. */
- int e = p->tu.tnum; /* Ending index (number of bits to test). */
- unsigned type; /* Token type (without TOREP bits). */
-
- while (++p, ++b<=e) {
- if (BITOFF(hits,1L<<(b-1)) &&(opt || BITOFF(p->ttype,TOPT)) && b!=but)
- return 0;
- if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
- p += grpsz(p, p->tu.tnum);
- }
- return 1;
- }
- /******************************************************************************/
- /* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
- the caller's "first" bit. If all bits through the
- specified last bit are on, it returns 0.
- */
- int offbit(bits, first, last)
- long bits; /* Bits to be tested. */
- int first; /* Index of first bit to be tested in bits. */
- int last; /* Index of last bit to be tested in bits. */
- {
- while (++first <= last)
- if (BITOFF(bits, 1L<<(first-1))) return first
- ;
- return 0;
- }
- /******************************************************************************/
- #undef GI
- #undef NEWGI
- #undef STATUS
- #undef RCEND
- #undef RCREQ
- #undef RCMISS
- #undef RCHIT
- #undef RCMEX
- #undef RCHITMEX
- #undef RCPEX
- #undef RCNREQ
- #undef PEX
- #undef M
- #undef P
- #undef G
- #undef T
- #undef H
- #undef GHDR
- #undef TOKEN
- #undef TTYPE
- #undef TOCC
- #undef GTYPE
- #undef GOCC
- #undef GNUM
- #undef TOKENHIT
- /******************************************************************************/
-